need to clean up the data - convert cols into numeric types before we can do this

nba_24_25_playoff_shooting_data <- nba_24_25_playoff_shooting_data %>%
  mutate(across(
    c(`Restricted Area FGM`, `Restricted Area FGA`,
      `In The Paint (Non-RA) FGM`, `In The Paint (Non-RA) FGA`,
      `Mid-Range FGM`, `Mid-Range FGA`,
      `Corner 3 FGM`, `Corner 3 FGA`,
      `Above the Break 3. FGM`, `Above the Break 3. FGA`),
    ~ as.numeric(.)
  )) %>%
  mutate(
    `3 FGM` = `Corner 3 FGM` + `Above the Break 3. FGM`,
    `3 FGA` = `Corner 3 FGA` + `Above the Break 3. FGA`,
    `3 FG%` = ifelse(`3 FGA` == 0, 0, (`3 FGM` / `3 FGA`) * 100),
    `2 FGM` = `Restricted Area FGM` + `In The Paint (Non-RA) FGM` + `Mid-Range FGM`,
    `2 FGA` = `Restricted Area FGA` + `In The Paint (Non-RA) FGA` + `Mid-Range FGA`,
    `2 FG%` = ifelse(`2 FGA` == 0, 0, (`2 FGM` / `2 FGA`) * 100)
  )
## Warning: There were 4 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(...)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 3 remaining warnings.

Introduction

The NBA playoffs are defined by high-stakes matchups, rapidly shifting strategies, and the constant search for competitive advantages. As modern basketball increasingly emphasizes spacing and perimeter shooting, an important analytical question emerges: To what extent does three-point shooting influence playoff success compared to two-point efficiency? In this report, we analyze team-level and player-level data from the 2024–2025 NBA playoffs to investigate how shooting performance across different zones—such as corner threes, above-the-break threes, and shots in the paint—relates to win percentage and overall postseason success. We aim to determine whether three-point percentage is a stronger predictor of playoff performance than two-point percentage, and which specific shooting areas contribute most to winning. We theorize that three-point efficiency is a significantly stronger predictor of playoff success than two-point efficiency in modern day basketball and that specific high-value shooting zones—particularly corner and above-the-break threes—provide meaningful competitive advantages for teams in the 2024–2025 NBA playoffs.

Background - Will

Analysis - Group

library(tidyverse)
library(dplyr)
library(readr)
library(ggplot2)
library(ggrepel)
nba_summary <- nba_24_25_playoff_shooting_data %>%
  group_by(Team) %>%
  summarise(
    total_3FGA = sum(`3 FGA`),
    total_2FGA = sum(`2 FGA`)
  ) %>%
  full_join(
    nba_24_25_playoff_team_data %>% select(Team, `WIN%`),
    by = "Team"
  )

nba_summary %>%
  mutate(`3_2_pointDifference` = total_2FGA - total_3FGA) %>%
  ggplot(aes(x = `3_2_pointDifference`, y = `WIN%`, color = Team)) +
  geom_smooth(aes(group = 1), method = "lm", color = "black", se = FALSE) +
  geom_point(size = 3) +
  labs(
    x = "3pt FGA - 2pt FGA",
    y = "Win Percentage",
    title = "Impact of 2-pt vs 3-pt Attempts on Playoff Win %"
  )
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Analysis - Team 3PT Shooting by Zone

# Aggregate player-level data to team-level averages for 3PT zones
team_zone_avgs <- nba_24_25_playoff_shooting_data %>%
  group_by(Team) %>%
  summarise(
    `Left Corner 3 FG%` = mean(as.numeric(`Left Corner 3. FG%`), na.rm = TRUE),
    `Right Corner 3 FG%` = mean(as.numeric(`Right Corner 3. FG%`), na.rm = TRUE),
    `Above the Break 3 FG%` = mean(`Above the Break 3. FG%`, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(`Left Corner 3 FG%`, `Right Corner 3 FG%`, `Above the Break 3 FG%`),
    names_to = "Zone",
    values_to = "FG_Percent"
  )
## Warning: There were 8 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `Left Corner 3 FG% = mean(as.numeric(`Left Corner 3. FG%`),
##   na.rm = TRUE)`.
## ℹ In group 8: `Team = "LAC"`.
## Caused by warning in `mean()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 7 remaining warnings.

This chunk of code is aggregating player-level three-point shooting data into team-level averages for the 2024–2025 NBA playoffs. Because our goal is to analyze how three-point shooting affects team win percentage and playoff success, we need a single shooting percentage per team per zone, rather than having multiple rows per player.

team_zone_avgs <- team_zone_avgs %>%
  left_join(nba_24_25_playoff_team_data %>% select(Team, `WIN%`), by = "Team")
# Scatter plot with size proportional to WIN%
ggplot(team_zone_avgs, aes(x = Zone, y = FG_Percent, color = Team, size = `WIN%`)) +
  geom_point() +
  geom_text_repel(aes(label = Team), vjust = -0.5, size = 3, show.legend = FALSE) +
  labs(
    title = "NBA 2024-2025 Playoff Teams: 3PT FG% by Zone with WIN%",
    x = "3-Point Zone",
    y = "FG%",
    size = "Win %"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# This code builds on the previous team-level aggregation of three-point shooting percentages and creates a scatter plot visualizing 3PT shooting performance by zone for each playoff team, while also showing team success (win percentage) as the size of the points.((0.2%-Smallest sized point), (0.4% - Medium sized point) (0.6%-Large sized point))

After these graphing results we infer that the right corner three should be the strongest indicator to a teams success as both OKC and IND made it to the finals and both rank the highest on the right corner three FG%.

# Make sure team averages and WIN% are joined
team_zone_avgs <- nba_24_25_playoff_shooting_data %>%
  group_by(Team) %>%
  summarise(
    Left_Corner_3_FG = mean(as.numeric(`Left Corner 3. FG%`), na.rm = TRUE),
    Right_Corner_3_FG = mean(as.numeric(`Right Corner 3. FG%`), na.rm = TRUE),
    Above_the_Break_3_FG = mean(`Above the Break 3. FG%`, na.rm = TRUE)
  ) %>%
  left_join(nba_24_25_playoff_team_data %>% select(Team, `WIN%`), by = "Team")
## Warning: There were 8 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `Left_Corner_3_FG = mean(as.numeric(`Left Corner 3. FG%`), na.rm
##   = TRUE)`.
## ℹ In group 8: `Team = "LAC"`.
## Caused by warning in `mean()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 7 remaining warnings.
# Correlation tests for each zone
cor_left <- cor.test(team_zone_avgs$Left_Corner_3_FG, team_zone_avgs$`WIN%`)
cor_right <- cor.test(team_zone_avgs$Right_Corner_3_FG, team_zone_avgs$`WIN%`)
cor_above <- cor.test(team_zone_avgs$Above_the_Break_3_FG, team_zone_avgs$`WIN%`)
cor_left
## 
##  Pearson's product-moment correlation
## 
## data:  team_zone_avgs$Left_Corner_3_FG and team_zone_avgs$`WIN%`
## t = 1.9832, df = 14, p-value = 0.06732
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.03567938  0.78238802
## sample estimates:
##       cor 
## 0.4683087
cor_right
## 
##  Pearson's product-moment correlation
## 
## data:  team_zone_avgs$Right_Corner_3_FG and team_zone_avgs$`WIN%`
## t = 1.8329, df = 14, p-value = 0.08817
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.07135323  0.76811658
## sample estimates:
##       cor 
## 0.4399119
cor_above
## 
##  Pearson's product-moment correlation
## 
## data:  team_zone_avgs$Above_the_Break_3_FG and team_zone_avgs$`WIN%`
## t = 2.2866, df = 14, p-value = 0.03831
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.0347359 0.8082434
## sample estimates:
##       cor 
## 0.5214623
  • As you can see above there is a noticeable to strong correlation among all three zones with the above the break 3 having the strongest correlation. This disproves that the right corner three was the strongest indicator to a teams success as it had the smallest p-value.
nba_win_perc <- nba_24_25_playoff_team_data %>%
  select(Team, `WIN%`)

nba_sum <- nba_24_25_playoff_shooting_data %>%
  group_by(Team) %>%
  full_join(nba_win_perc, by = "Team") %>%
  slice_max(order_by = `3 FGA` + `2 FGA`, n = 6, with_ties = TRUE) %>%
  ungroup()

  
  
  

nba_sum
## # A tibble: 96 × 31
##    Player             Team    Age `Restricted Area FGM` `Restricted Area FGA`
##    <chr>              <chr> <dbl>                 <dbl>                 <dbl>
##  1 Jayson Tatum       BOS      27                   3.4                   4.5
##  2 Jaylen Brown       BOS      28                   2.8                   4.3
##  3 Derrick White      BOS      30                   1.7                   2.1
##  4 Payton Pritchard   BOS      27                   1.2                   1.7
##  5 Jrue Holiday       BOS      35                   2                     2.3
##  6 Kristaps Porziņģis BOS      29                   1.3                   2.2
##  7 Donovan Mitchell   CLE      28                   3.3                   5.4
##  8 Darius Garland     CLE      25                   0.6                   1  
##  9 Evan Mobley        CLE      24                   3.5                   4.3
## 10 Ty Jerome          CLE      27                   0.8                   1.7
## # ℹ 86 more rows
## # ℹ 26 more variables: `Restricted Area FG%` <dbl>,
## #   `In The Paint (Non-RA) FGM` <dbl>, `In The Paint (Non-RA) FGA` <dbl>,
## #   `In The Paint (Non-RA) FG%` <chr>, `Mid-Range FGM` <dbl>,
## #   `Mid-Range FGA` <dbl>, `Mid-Range FG%` <dbl>, `Left Corner 3. FGM` <chr>,
## #   `Left Corner 3. FGA` <chr>, `Left Corner 3. FG%` <chr>,
## #   `Right Corner 3. FGM` <chr>, `Right Corner 3. FGA` <chr>, …
nba_shared_data <- SharedData$new(nba_sum, key = ~Team, group = "NBA_Data")


nba_3v2FGM_plot <- nba_shared_data %>%
  ggplot(aes(x = `2 FGM`, y = `3 FGM`)) +
  geom_point(size = 3, aes(color = `WIN%`, text = paste("Team:", Team, "\nWin %:",`WIN%`, "\nPlayer:", Player))) + 
  scale_color_gradientn(
    colors = c("blue", "dodgerblue","deepskyblue", "white", "orange", "red", "darkred")
  ) + 
  geom_vline(xintercept = mean(nba_sum$`2 FGM`), linetype = "dashed", alpha = 0.5) +
  geom_hline(yintercept = mean(nba_sum$`3 FGM`), linetype = "dashed", alpha = 0.5) +
  labs(
    y = "3-Point FG Made", x = "2-Point FG Made", title = "3 vs 2 Point Field Goals Made, Win% Gradient in 24-25 Playoffs"
    )
## Warning in geom_point(size = 3, aes(color = `WIN%`, text = paste("Team:", :
## Ignoring unknown aesthetics: text
interactive <- ggplotly(nba_3v2FGM_plot, tooltip = c("text", "x", "y"))

team_filter <- filter_select(
  id = "team_select",
  label = "Select Team:",
  sharedData = nba_shared_data,
  group = ~Team,
  allLevels = TRUE
)

browsable(
  tagList(
    team_filter,
    interactive
  )
)
nba_24_25_playoff_team_data %>%
  ggplot(aes(x = `FGM`-`3PM`, y = `3PM`)) +
  geom_point(size = 3, aes(color = `WIN%`)) + 
  scale_color_gradientn(
    colors = c("blue", "dodgerblue","deepskyblue", "white", "orange", "red", "darkred")
  ) + 
  geom_text_repel(aes(label = Team), vjust = -0.5, size = 3, show.legend = FALSE) +
  geom_vline(aes(xintercept = mean(`FGM`-`3PM`)), linetype = "dashed", alpha = 0.5) +
  geom_hline(aes(yintercept = mean(`3PM`)), linetype = "dashed", alpha = 0.5) +
  labs(
    y = "3-Point FG Made", x = "2-Point FG Made", title = "3 vs 2 Point Field Goals Made by Team in 24-25 Playoffs"
    )

Discussion - Alexander

Reference